We begin by limiting the DMO data to conventional gilts only and sanitising UTF-8 encoding (£ symbol and fraction characters in Gilt Names). Also be wary of Excel/Python interpreting 2-digit years accoridng to POSIX and ISO C standards: values 69–99 are mapped to 1969–1999, and values 0–68 are mapped to 2000–2068 (https://docs.python.org/3/library/time.html) - so explicitly set all years to 4 digits, specifically far dated maturities e.g. 50y.
Then compute Time to Maturity in days and years as time between Redemption date and Settlement date:
from pandas.io.excel import read_excel
import pandas as pd
import numpy as np
from scipy import optimize
dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
DMOdata = pd.read_csv(r'data/GILTS2012ToPresent.csv', parse_dates=['Close of Business Date', 'Redemption Date'], date_parser=dateparse)
dmo_ix = DMOdata.set_index('Close of Business Date')
dmo_ix['DaysToMaturity'] = [
(pd.to_datetime(dmo_ix['Redemption Date'].values[i]) -
pd.to_datetime(dmo_ix.index.values[i])
).days for i in range(len(dmo_ix))]
##the above can be done using 'apply' something like this:
##dmo_ix.apply(lambda x: (pd.to_datetime(x['Redemption Date']).day - pd.to_datetime(x.index)).days) but mind rolling maturities.
##
dmo_ix['YearsToMaturity'] = dmo_ix['DaysToMaturity']/365
dmo_ix['Coupon'] = dmo_ix['Gilt Name'].apply(lambda x: float(x.split('%')[0]))
dmo_ix.drop(dmo_ix.ix[
'2013-04-03'
], inplace=True)
#'2014-03-05'
##drop dates with suspect data
Yield curve data is calibrated to the NSS model and yield surfaces are generated for raw data, fitted models and residuals
def NSS_optfit(T, beta_0, beta_1, beta_2, beta_3, tau_1, tau_2):
short = beta_1*((1-np.exp(-T/tau_1))/(T/tau_1))
hump1 = beta_2*((((1-np.exp(-T/tau_1))/(T/tau_1))) - (np.exp(-T/tau_1)))
hump2 = beta_3*((((1-np.exp(-T/tau_2))/(T/tau_2)))-(np.exp(-T/tau_2)))
fitted = beta_0 + short + hump1 + hump2
return fitted
NSSparams = {}
NSSmodel = []
yieldcurves = []
residuals = []
for idx in dmo_ix.index.unique(): ##can equivalently do this using "apply" across the dataframe, this is just more explicit to debug
try:
data = dmo_ix.ix[idx].pivot(values='Yield (%)', columns='YearsToMaturity')
except:
print('data yield curve pivot problem on this date', idx)
xdata, ydata = data.columns.values, data.values[0]
try:
popt, pcov = scipy.optimize.curve_fit(NSS_optfit, xdata, ydata, maxfev = 10000)
except:
print('curve_fitting maxfev problem on this date',idx)
y = NSS_optfit(xdata, *popt)
NSSparams[idx] = popt
yieldcurves.append(data)
NSSmodel.append(y)
residuals.append(y - data.values[0])
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly.graph_objs import *
init_notebook_mode()
raw_yld_data = [
go.Surface(
z=[i.values[0] for i in yieldcurves],
x = [i.columns.values for i in yieldcurves],
y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
)
]
dataNSS = [
go.Surface(
z=NSSmodel,
x = [i.columns.values for i in yieldcurves],
y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
)
]
residuals_data = [
go.Surface(
z=residuals,
x = [i.columns.values for i in yieldcurves],
y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
)
]
NSSparams_data = [
go.Surface(
z=NSSparams,
x = ['beta_0', 'beta_1', 'beta_2', 'beta_3', 'tau_1', 'tau_2'],
y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
)
]
layout = go.Layout(
title='Yields surface',
width=1024,
height=768,
scene=Scene(
xaxis=XAxis(title='Maturity (years)'),
yaxis=YAxis(title='Close of Business date'),
zaxis=ZAxis(title='Yield (%)')
)
)
fig = go.Figure(data=raw_yld_data, layout=layout)
iplot(fig)
fig_NSS = go.Figure(data=dataNSS, layout=layout)
iplot(fig_NSS)
fig_residuals = go.Figure(data=residuals_data, layout=layout)
iplot(fig_residuals)
The yields and residuals surfaces above indicate data anomalies notably for 2014-03-05 exhibiting a prominent hump. Suggest winsorizing distribution to eliminate outliers.
We briefly compare BOE nominal yield curves fitted using a VRP methodology against our NSS model fit. Chiefly, we are interested in the difference between forward and spot yield to estimate carry. We test an arbitrary date (2015-06-29) to demonsrate this:
from pandas.io.excel import read_excel
urls = [
r'data\uknom05_mdaily.xls', #2005 to 2015 only
#r'data\uknom16_mdaily.xlsx' #2016 to present
]
spot_curve = pd.DataFrame()
short_end_spot_curve = pd.DataFrame()
for f in urls:
spot_data = read_excel(f, sheetname=8)
spot_curve = spot_curve.append(spot_data)
short_end_spot_data = read_excel(f, sheetname=6)
short_end_spot_curve = short_end_spot_curve.append(short_end_spot_data)
fwd_spot_curve = pd.DataFrame()
fwd_short_end_spot_curve = pd.DataFrame()
for f in urls:
fwd_spot_data = read_excel(f, sheetname=5)
fwd_spot_curve = fwd_spot_curve.append(fwd_spot_data)
fwd_short_end_spot_data = read_excel(f, sheetname=4)
fwd_short_end_spot_curve = fwd_short_end_spot_curve.append(fwd_short_end_spot_data)
spot_curve.columns = spot_curve.loc['years:']
spot_curve.columns.name = 'years'
valid_index = spot_curve.index[4:]
spot_curve = spot_curve.loc[valid_index]
fwd_spot_curve.columns = fwd_spot_curve.loc['years:']
fwd_spot_curve.columns.name = 'years'
fwd_valid_index = fwd_spot_curve.index[4:]
fwd_spot_curve = fwd_spot_curve.loc[fwd_valid_index]
short_end_spot_curve.columns = short_end_spot_curve.loc['years:']
short_end_spot_curve.columns.name = 'years'
short_valid_index = short_end_spot_curve.index[4:]
short_end_spot_curve = short_end_spot_curve.loc[short_valid_index]
fwd_short_end_spot_curve.columns = fwd_short_end_spot_curve.loc['years:']
fwd_short_end_spot_curve.columns.name = 'years'
fwd_short_valid_index = fwd_short_end_spot_curve.index[4:]
fwd_short_end_spot_curve = fwd_short_end_spot_curve.loc[fwd_short_valid_index]
combined_fwd_data = pd.concat([fwd_short_end_spot_curve, fwd_spot_curve], axis=1, join='outer')
combined_fwd_data.sort_index(axis=1, inplace=True)
combined_data = pd.concat([short_end_spot_curve, spot_curve], axis=1, join='outer')
combined_data.sort_index(axis=1, inplace=True)
def filter_func(group):
return group.isnull().sum(axis=1) <= 50
combined_data = combined_data.groupby(level=0).filter(filter_func)
combined_fwd_data = combined_fwd_data.groupby(level=0).filter(filter_func)
from scipy.interpolate import interp1d
maturity = pd.Series((np.arange(12 * 25) + 1) / 12)
key = lambda x: x.date
by_day = combined_data.groupby(level=0)
fwd_by_day = combined_fwd_data.groupby(level=0)
functDict = {} ## holds interpolated functions of the forward curve for each date so we can use it to estimate carry later on
##as BOE model has already been estimated by VRP we just do a simple interpolation rather than NSS
def interpolate_maturities(group, build_funct_dict=False):
a = group.T.dropna().reset_index()
f = interp1d(a.iloc[:, 0], a.iloc[:, 1], kind='cubic', bounds_error=False, assume_sorted=True)
if build_funct_dict:
functDict[group.index.values[0]] = f
return pd.Series(maturity.apply(f).values, index=maturity.values)
cleaned_fwd_spot_curve = fwd_by_day.apply(interpolate_maturities, build_funct_dict=True)
cleaned_spot_curve = by_day.apply(interpolate_maturities)
##Utility method to structure a butterfly portfolio
##Compute proportions of near and future matiries per unit position in medium maturity to structure the butterfly to be duration neutral:
def butterfly_position_solver(near, medium, far): #return proportion of near and far maturies per unit medium position
far_duration = float(far['Modified Duration'])
medium_duration = float(medium['Modified Duration'])
near_duration = float(near['Modified Duration'])
far_price = float(far['Dirty Price'])
medium_price = float(medium['Dirty Price'])
near_price =float(near['Dirty Price'])
a = np.array([[near_duration, far_duration],
[near_price, far_price]])
b = np.array([medium_duration,
medium_price])
return np.linalg.solve(a, b)